/* ***************************************************** **
   ch20_finding_abnormal_peaks.sql
   
   Skrypt dla książki Praktyczna nauka SQL dla Oracle, Helion (2022),
   napisanej przez Kima Berga Hansena, https://www.kibeha.dk
   Używasz na własną odpowiedzialność.
   *****************************************************
   
   Rozdział 20.
   Wyszukiwanie nagłych skoków
   
   Skrypt przeznaczony do wykonania w schemacie PRACTICAL
** ***************************************************** */

/* -----------------------------------------------------
   Konfiguracja formatowania sqlcl
   ----------------------------------------------------- */

set pagesize 80
set linesize 80
set sqlformat ansiconsole

alter session set nls_date_format = 'YYYY-MM-DD';

/* -----------------------------------------------------
   Przykładowy kod do rozdziału 20.
   ----------------------------------------------------- */

-- Listing 20.2. Strony w naszej przykładowej aplikacji internetowej

select
   p.app_id
 , a.name as app_name
 , p.page_no
 , p.friendly_url
from web_apps a
join web_pages p
   on p.app_id = a.id
order by p.app_id, p.page_no;

-- Listing 20.3. Historyczne dane licznika odwiedzin strony internetowej

select
   friendly_url, day, counter
from web_page_counter_hist
where app_id = 542
order by page_no, day;

-- Listing 20.4. Ustalenie dni, w których wartość licznika odwiedzin strony zwiększyła się o co najmniej 200

select
   url, from_day, to_day, days, begin, growth, daily
from web_page_counter_hist
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , first(day) as from_day
    , last(day) as to_day
    , count(*) as days
    , first(counter) as begin
    , next(counter) - first(counter) as growth
    , (next(counter) - first(counter)) / count(*)
         as daily
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as next(counter) - counter >= 200
)
order by page_no, from_day;

-- Jawne użycie final i last

select
   url, from_day, to_day, days, begin, growth, daily
from web_page_counter_hist
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , first(counter) as begin
    , next(final last(counter)) - first(counter) as growth
    , (next(final last(counter)) - first(counter))
         / final count(*) as daily
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as next(counter) - counter >= 200
)
order by page_no, from_day;

-- Listing 20.5. Ustalanie dni, w których wartość licznika wzrosła o co najmniej 4%

select
   url, from_day, to_day, days, begin, pct, daily
from web_page_counter_hist
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , first(counter) as begin
    , round(
         100 * (next(final last(counter)) / first(counter))
             - 100
       , 1
      ) as pct
    , round(
         (100 * (next(final last(counter)) / first(counter))
                  - 100) / final count(*)
       , 1
      ) as daily
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as next(counter) / counter >= 1.04
)
order by page_no, from_day;

-- Okresy, w których wzrost danego dnia wyniósł średnio przynajmniej 4%

select
   url, from_day, to_day, days, begin, pct, daily
from web_page_counter_hist
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , first(counter) as begin
    , round(
         100 * (next(final last(counter)) / first(counter))
             - 100
       , 1
      ) as pct
    , round(
         (100 * (next(final last(counter)) / first(counter))
                  - 100) / final count(*)
       , 1
      ) as daily
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as ((next(counter) / first(counter)) - 1)
                 / running count(*)  >= 0.04
)
order by page_no, from_day;

-- Listing 20.6. Wyświetlenie liczby dziennych odwiedzin strony

select
   friendly_url, day
 , lead(counter) over (
      partition by page_no order by day
   ) - counter as visits
from web_page_counter_hist
order by page_no, day;

-- Listing 20.7. Wyświetlenie okresów, w których dziennych odwiedzin strony było co najmniej o 50 więcej niż poprzedniego dnia

select
   url, from_day, to_day, days, begin, p_v, f_v, t_v, d_v
from web_page_counter_hist
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , first(counter) as begin
    , first(counter) - prev(first(counter)) as p_v
    , next(first(counter)) - first(counter) as f_v
    , next(final last(counter)) - first(counter) as t_v
    , round(
         (next(final last(counter)) - first(counter))
            / final count(*)
       , 1
      ) as d_v
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as next(counter) - counter
               - (first(counter) - prev(first(counter))) >= 50
)
order by page_no, from_day;

-- Listing 20.8. Wcześniejsze obliczenie liczby dziennych odwiedzin strony w celu uproszczenia kodu rozwiązania

select
   url, from_day, to_day, days, begin, p_v, f_v, t_v, d_v
from (
   select
      page_no, friendly_url, day, counter
    , lead(counter) over (
         partition by page_no order by day
      ) - counter as visits
   from web_page_counter_hist
)
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , first(counter) as begin
    , prev(first(visits)) as p_v
    , first(visits) as f_v
    , final sum(visits) as t_v
    , round(final avg(visits)) as d_v
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as visits - prev(first(visits)) >= 50
)
order by page_no, from_day;

-- Listing 20.9. Wyświetlenie okresów, w których dziennych odwiedzin strony było co najmniej o 50% więcej niż poprzedniego dnia

select
   url, from_day, to_day, days, begin, p_v, f_v, t_v, d_pct
from (
   select
      page_no, friendly_url, day, counter
    , lead(counter) over (
         partition by page_no order by day
      ) - counter as visits
   from web_page_counter_hist
)
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , first(counter) as begin
    , prev(first(visits)) as p_v
    , first(visits) as f_v
    , final sum(visits) as t_v
    , round(
         (100*(final sum(visits) / prev(first(visits))) - 100)
            / final count(*)
       , 1
      ) as d_pct
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as visits / nullif(prev(first(visits)), 0) >= 1.5
)
order by page_no, from_day;

-- Listing 20.10. Wyświetlenie okresów, w których dziennych odwiedzin strony było co najmniej o 50% więcej niż średnia dziennych odwiedzin

select
   url, avg_v, from_day, to_day, days, t_v, d_v, d_pct
from (
   select
      page_no, friendly_url, day, counter, visits
    , avg(visits) over (
         partition by page_no
      ) as avg_visits
   from (
      select
         page_no, friendly_url, day, counter
       , lead(counter) over (
            partition by page_no order by day
         ) - counter as visits
      from web_page_counter_hist
   )
)
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , round(first(avg_visits), 1) as avg_v
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , final sum(visits) as t_v
    , round(final avg(visits), 1) as d_v
    , round(
         (100 * final avg(visits) / avg_visits) - 100
       , 1
      ) as d_pct
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as visits / avg_visits >= 1.5
)
order by page_no, from_day;

-- Dzienna liczba odwiedzin jest co najmniej 80% mniejsza niż średnia liczba odwiedzin

select
   url, avg_v, from_day, to_day, days, t_v, d_v, d_pct
from (
   select
      page_no, friendly_url, day, counter, visits
    , avg(visits) over (
         partition by page_no
      ) as avg_visits
   from (
      select
         page_no, friendly_url, day, counter
       , lead(counter) over (
            partition by page_no order by day
         ) - counter as visits
      from web_page_counter_hist
   )
)
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , round(first(avg_visits), 1) as avg_v
    , first(day) as from_day
    , final last(day) as to_day
    , final count(*) as days
    , final sum(visits) as t_v
    , round(final avg(visits), 1) as d_v
    , round(
         (100 * final avg(visits) / avg_visits) - 100
       , 1
      ) as d_pct
   one row per match
   after match skip past last row
   pattern ( peak+ )
   define
      peak as visits / avg_visits <= 0.2
)
order by page_no, from_day;

-- Listing 20.11. Jednoczesne wyszukiwanie wielu rekordów sklasyfikowanych jako peak

select
   url, avg_v, from_day, days, class, t_v, d_v, d_pct
from (
   select
      page_no, friendly_url, day, counter, visits
    , avg(visits) over (
         partition by page_no
      ) as avg_visits
   from (
      select
         page_no, friendly_url, day, counter
       , lead(counter) over (
            partition by page_no order by day
         ) - counter as visits
      from web_page_counter_hist
   )
)
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , round(first(avg_visits), 1) as avg_v
    , first(day) as from_day
    , final count(*) as days
    , classifier() as class
    , final sum(visits) as t_v
    , round(final avg(visits), 1) as d_v
    , round(
         (100 * final avg(visits) / avg_visits) - 100
       , 1
      ) as d_pct
   one row per match
   after match skip past last row
   pattern ( high{1,} | medium{2,} | low{3,} )
   define
      high   as visits / avg_visits >= 4
    , medium as visits / avg_visits >= 2
    , low    as visits / avg_visits >= 1.1
)
order by page_no, from_day;

-- Listing 20.12. Wyszukiwanie nagłych skoków o określonym kształcie

select
   url, avg_v, from_day, days, hi, med, low, t_v, d_v, d_pct
from (
   select
      page_no, friendly_url, day, counter, visits
    , avg(visits) over (
         partition by page_no
      ) as avg_visits
   from (
      select
         page_no, friendly_url, day, counter
       , lead(counter) over (
            partition by page_no order by day
         ) - counter as visits
      from web_page_counter_hist
   )
)
match_recognize(
   partition by page_no
   order by day
   measures
      first(friendly_url) as url
    , round(first(avg_visits), 1) as avg_v
    , first(day) as from_day
    , final count(*) as days
    , final count(high.*) as hi
    , final count(medium.*) as med
    , final count(low.*) as low
    , final sum(visits) as t_v
    , round(final avg(visits), 1) as d_v
    , round(
         (100 * final avg(visits) / avg_visits) - 100
       , 1
      ) as d_pct
   one row per match
   after match skip past last row
   pattern ( high+ medium+ low+ )
   define
      high   as visits / avg_visits >= 2.5
    , medium as visits / avg_visits >= 1.5
    , low    as visits / avg_visits >= 1.1
)
order by page_no, from_day;

/* ***************************************************** */
